{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Watch the Trained Agent"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.Start the Environment for Trained Agent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:unityagents:\n",
      "'Academy' started successfully!\n",
      "Unity Academy name: Academy\n",
      "        Number of Brains: 1\n",
      "        Number of External Brains : 1\n",
      "        Lesson number : 0\n",
      "        Reset Parameters :\n",
      "\t\tgoal_speed -> 1.0\n",
      "\t\tgoal_size -> 5.0\n",
      "Unity brain name: ReacherBrain\n",
      "        Number of Visual Observations (per agent): 0\n",
      "        Vector Observation space type: continuous\n",
      "        Vector Observation space size (per agent): 33\n",
      "        Number of stacked Vector Observation: 1\n",
      "        Vector Action space type: continuous\n",
      "        Vector Action space size (per agent): 4\n",
      "        Vector Action descriptions: , , , \n"
     ]
    }
   ],
   "source": [
    "from unityagents import UnityEnvironment\n",
    "import numpy as np\n",
    "import torch\n",
    "from ddpg_agent import Agent\n",
    "\n",
    "env = UnityEnvironment(file_name='Reacher_Windows_x86_64/Reacher.exe')\n",
    "agent = Agent(33, 4, 8)\n",
    "\n",
    "# get the default brain\n",
    "brain_name = env.brain_names[0]\n",
    "brain = env.brains[brain_name]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2. Prepare Player"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def play(agent, episodes=5):\n",
    "    action_size=4\n",
    "    num_agents=20\n",
    "    for i_episode in range(episodes):\n",
    "        env_info = env.reset(train_mode=False)[brain_name]     # reset the environment    \n",
    "        states = env_info.vector_observations                  # get the current state (for each agent)\n",
    "        scores = np.zeros(num_agents)                          # initialize the score (for each agent)\n",
    "        while True:\n",
    "            actions = np.random.randn(num_agents, action_size) # select an action (for each agent)\n",
    "            actions = agent.act(states, add_noise=False)       # all actions between -1 and 1\n",
    "            env_info = env.step(actions)[brain_name]           # send all actions to tne environment\n",
    "            next_states = env_info.vector_observations         # get next state (for each agent)\n",
    "            rewards = env_info.rewards                         # get reward (for each agent)\n",
    "            dones = env_info.local_done                        # see if episode finished\n",
    "            scores += env_info.rewards                         # update the score (for each agent)\n",
    "            states = next_states                               # roll over states to next time step\n",
    "            if np.any(dones):                                  # exit loop if episode finished\n",
    "                break\n",
    "            #break\n",
    "        print('Episode: {} Average Score (over agents): {}'.format(i_episode, np.mean(scores)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3. Play Before  Training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Episode: 0 Average Score (over agents): 0.0\n",
      "Episode: 1 Average Score (over agents): 0.0\n"
     ]
    }
   ],
   "source": [
    "play(agent, episodes=2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 4. Load Trained Weights"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load(agent, actor_file, critic_file):\n",
    "    agent.actor_local.load_state_dict(torch.load(actor_file))\n",
    "    agent.actor_target.load_state_dict(torch.load(actor_file))\n",
    "    agent.critic_local.load_state_dict(torch.load(critic_file))\n",
    "    agent.critic_target.load_state_dict(torch.load(critic_file))\n",
    "    \n",
    "load(agent, 'checkpoint_actor.pth', 'checkpoint_critic.pth')   "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 5. Play After Training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Episode: 0 Average Score (over agents): 38.78149913316592\n",
      "Episode: 1 Average Score (over agents): 38.71349913468585\n",
      "Episode: 2 Average Score (over agents): 38.77949913321063\n"
     ]
    }
   ],
   "source": [
    "play(agent, episodes=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "env.close()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:Anaconda_3]",
   "language": "python",
   "name": "conda-env-Anaconda_3-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
